library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
Data source:
https://www.kaggle.com/datasets/cdc/national-health-and-nutrition-examination-survey?select=labs.csv
Details:
https://www.cdc.gov/Nchs/Nhanes/about_nhanes.htm
demographic <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/demographic.xlsx"))
demographiDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/demographic.xlsx",sheet = "Sheet1"))
rownames(demographic) <- demographic$SEQN
demographic$SEQN <- NULL
vartokeep <- demographiDesc[demographiDesc$Keep=="Yes",1]
demographic <- demographic[,vartokeep]
keepColumn <- apply(!is.na(demographic),2,sum)/nrow(demographic) > 0.75
demographic <- demographic[,keepColumn]
demographic <- demographic[complete.cases(demographic),]
## Examination data
examination <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/examination.xlsx"))
examinationDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/examination.xlsx",sheet = "Sheet1"))
rownames(examination) <- examination$SEQN
examination$SEQN <- NULL
bpresDI <- examination[,c("BPXDI1","BPXDI2","BPXDI3","BPXDI4")]
examination[,c("BPXDI1","BPXDI2","BPXDI3","BPXDI4")] <- NULL
bpresDIM <- apply(bpresDI,1,mean,na.rm=TRUE)
summary(bpresDIM)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.00 58.00 66.67 65.44 74.67 128.00 2282
bpresSY <- examination[,c("BPXSY1","BPXSY2","BPXSY3","BPXSY4")]
examination[,c("BPXSY1","BPXSY2","BPXSY3","BPXSY4")] <- NULL
bpresSYM <- apply(bpresSY,1,mean,na.rm=TRUE)
summary(bpresSYM)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 64.67 106.00 115.33 118.31 128.00 228.67 2282
examination$PEASCST1 <- NULL
examination$bpresDIM <- bpresDIM
examination$bpresSYM <- bpresSYM
keepColumn <- apply(!is.na(examination),2,sum)/nrow(examination) > 0.75
examination <- examination[,keepColumn]
keepColumnVar <- !is.na(apply(examination,2,var,na.rm = TRUE))
examination <- examination[,keepColumnVar]
examination <- examination[complete.cases(examination),]
examination <- examination[examination$bpresDIM>0,]
## questionnaire data
questionnaire <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/questionnaire.xlsx"))
questionnaireDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/questionnaire.xlsx",sheet = "Sheet1"))
rownames(questionnaire) <- questionnaire$SEQN
questionnaire$SEQN <- NULL
table(questionnaire$MCQ010)
#>
#> 1 2 7 9
#> 1538 8222 1 8
#keepColumn <- apply(!is.na(questionnaire),2,sum)/nrow(questionnaire) > 0.85
#questionnaire <- questionnaire[,keepColumn]
#keepColumnVar <- !is.na(apply(questionnaire,2,var,na.rm = TRUE))
#questionnaire <- questionnaire[,keepColumnVar]
#questionnaire <- questionnaire[complete.cases(questionnaire),]
#keeprow <- apply(questionnaire,2,max) < 9000
#questionnaire <- questionnaire[keeprow,]
#table(questionnaire$MCQ010)
## Diabetes
Diabetes <- questionnaire$MCQ010
names(Diabetes) <- rownames(questionnaire)
Diabetes <- Diabetes[!is.na(Diabetes)]
Diabetes <- Diabetes[Diabetes<3]
table(Diabetes)
#> Diabetes
#> 1 2
#> 1538 8222
## Depression
#depression <- questionnaire[,c("DPQ010","DPQ020","DPQ030","DPQ040","DPQ050","DPQ060","DPQ070","DPQ080","DPQ090")]
#donnow <- apply(is.na(depression),1,sum) == 9
#depressionTot <- apply(depression>0,1,sum,na.rm=TRUE) > 0
depression <- questionnaire[,c("DPQ040")]
names(depression) <- rownames(questionnaire)
depression[depression>5] <- "NA"
depressionTot <- 1*(depression[!is.na(depression)] > 0)
table(depressionTot)
#> depressionTot
#> 0 1
#> 2650 2745
#depressionTot <- depressionTot[!donnow]
#table(depressionTot)
## The labs data
labs <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/labs.xlsx"))
labsDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/labs.xlsx",sheet = "Sheet1"))
rownames(labs) <- labs$SEQN
labs$SEQN <- NULL
keepColumn <- apply(!is.na(labs),2,sum)/nrow(labs) > 0.75
labs <- labs[,keepColumn]
keepColumnVar <- !is.na(apply(labs,2,var,na.rm = TRUE))
labs <- labs[,keepColumnVar]
labs <- labs[complete.cases(labs),]
includeIDS <- rownames(labs)[rownames(labs) %in% rownames(examination)]
includeIDS <- includeIDS[includeIDS %in% rownames(demographic)]
includeIDS <- includeIDS[includeIDS %in% names(Diabetes)]
Diab <- Diabetes[includeIDS]
table(Diab)
#> Diab
#> 1 2
#> 992 5006
NHNES <- cbind(demographic[includeIDS,],examination[includeIDS,],labs[includeIDS,])
NHNES$Diab <- Diab
table(NHNES$Diab)
#>
#> 1 2
#> 992 5006
NHNES <- NHNES[NHNES$RIDAGEYR >= 20,]
NHNES$Diab <- 1*(NHNES$Diab == 1)
table(NHNES$Diab)
#>
#> 0 1
#> 3664 650
includeIDS <- rownames(NHNES)
includeIDS <- includeIDS[includeIDS %in% names(depressionTot)]
depressionTot <- depressionTot[includeIDS]
NHNESDep <- cbind(NHNES[includeIDS,],depressionTot)
NHNESDep$depressionTot <- 1*NHNESDep$depressionTot
tbdep <- table(NHNESDep$depressionTot)
genderDiab <- NHNESDep[,c("RIAGENDR","Diab","depressionTot")]
isContinuous <- sapply(apply(NHNESDep,2,unique),length) > 4
sum(isContinuous)
#> [1] 62
NHNESDep <- cbind(NHNESDep[,isContinuous],genderDiab)
table(NHNESDep$depressionTot)
#>
#> 0 1
#> 2072 2080
NHNESDepMales <- subset(NHNESDep,RIAGENDR==1)
table(NHNESDepMales$depressionTot)
#>
#> 0 1
#> 1191 867
NHNESDepFemales <- subset(NHNESDep,RIAGENDR==2)
table(NHNESDepFemales$depressionTot)
#>
#> 0 1
#> 881 1213
studyName <- "NHNES"
dataframe <- NHNESDepFemales
outcome <- "depressionTot"
TopVariables <- 10
thro <- 0.40
cexheat = 0.15
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 2094 | 64 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 881 | 1213 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples
dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
0.9986573
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 56 , Uni p: 0.01060209 , Uncorrelated Base: 16 , Outcome-Driven Size: 0 , Base Size: 16
#>
#>
1 <R=0.999,r=0.974,N= 2>, Top: 1( 1 )[ 1 : 1 Fa= 1 : 0.974 ]( 1 , 1 , 0 ),<|>Tot Used: 2 , Added: 1 , Zero Std: 0 , Max Cor: 0.950
#>
2 <R=0.950,r=0.925,N= 15>, Top: 7( 1 )[ 1 : 7 Fa= 6 : 0.925 ]( 5 , 7 , 1 ),<|>Tot Used: 14 , Added: 7 , Zero Std: 0 , Max Cor: 0.923
#>
3 <R=0.923,r=0.911,N= 15>, Top: 3( 2 )[ 1 : 3 Fa= 7 : 0.911 ]( 3 , 4 , 6 ),<|>Tot Used: 18 , Added: 4 , Zero Std: 0 , Max Cor: 0.911
#>
4 <R=0.911,r=0.905,N= 15>, Top: 1( 1 )[ 1 : 1 Fa= 7 : 0.905 ]( 1 , 1 , 7 ),<|>Tot Used: 19 , Added: 1 , Zero Std: 0 , Max Cor: 0.899
#>
5 <R=0.899,r=0.850,N= 8>, Top: 4( 1 )[ 1 : 4 Fa= 10 : 0.850 ]( 4 , 4 , 7 ),<|>Tot Used: 25 , Added: 4 , Zero Std: 0 , Max Cor: 0.845
#>
6 <R=0.845,r=0.823,N= 8>, Top: 1( 1 )[ 1 : 1 Fa= 11 : 0.823 ]( 1 , 1 , 10 ),<|>Tot Used: 27 , Added: 1 , Zero Std: 0 , Max Cor: 0.819
#>
7 <R=0.819,r=0.759,N= 6>, Top: 3( 1 )[ 1 : 3 Fa= 13 : 0.759 ]( 3 , 3 , 11 ),<|>Tot Used: 30 , Added: 3 , Zero Std: 0 , Max Cor: 0.752
#>
8 <R=0.752,r=0.726,N= 6>, Top: 3( 1 )[ 1 : 3 Fa= 13 : 0.726 ]( 2 , 2 , 13 ),<|>Tot Used: 31 , Added: 2 , Zero Std: 0 , Max Cor: 0.724
#>
9 <R=0.724,r=0.612,N= 23>, Top: 8( 1 )[ 1 : 8 Fa= 16 : 0.612 ]( 8 , 13 , 13 ),<|>Tot Used: 40 , Added: 13 , Zero Std: 0 , Max Cor: 0.970
#>
10 <R=0.970,r=0.735,N= 23>, Top: 3( 1 )[ 1 : 3 Fa= 16 : 0.735 ]( 3 , 3 , 16 ),<|>Tot Used: 40 , Added: 3 , Zero Std: 0 , Max Cor: 0.934
#>
11 <R=0.934,r=0.717,N= 23>, Top: 1( 1 )[ 1 : 1 Fa= 17 : 0.717 ]( 1 , 1 , 16 ),<|>Tot Used: 40 , Added: 1 , Zero Std: 0 , Max Cor: 0.709
#>
12 <R=0.709,r=0.479,N= 22>, Top: 10( 1 )[ 1 : 10 Fa= 21 : 0.479 ]( 8 , 12 , 17 ),<|>Tot Used: 45 , Added: 12 , Zero Std: 0 , Max Cor: 0.877
#>
13 <R=0.877,r=0.563,N= 22>, Top: 3( 1 )[ 1 : 3 Fa= 21 : 0.563 ]( 3 , 5 , 21 ),<|>Tot Used: 45 , Added: 5 , Zero Std: 0 , Max Cor: 0.715
#>
14 <R=0.715,r=0.482,N= 22>, Top: 3( 1 )[ 1 : 3 Fa= 21 : 0.482 ]( 3 , 3 , 21 ),<|>Tot Used: 45 , Added: 3 , Zero Std: 0 , Max Cor: 0.926
#>
15 <R=0.926,r=0.588,N= 22>, Top: 1( 1 )[ 1 : 1 Fa= 21 : 0.588 ]( 1 , 1 , 21 ),<|>Tot Used: 45 , Added: 1 , Zero Std: 0 , Max Cor: 0.727
#>
16 <R=0.727,r=0.413,N= 18>, Top: 9( 1 )[ 1 : 9 Fa= 24 : 0.413 ]( 8 , 9 , 21 ),<|>Tot Used: 47 , Added: 9 , Zero Std: 0 , Max Cor: 0.554
#>
17 <R=0.554,r=0.400,N= 18>, Top: 4( 1 )[ 1 : 4 Fa= 24 : 0.400 ]( 4 , 4 , 24 ),<|>Tot Used: 47 , Added: 4 , Zero Std: 0 , Max Cor: 0.459
#>
18 <R=0.459,r=0.400,N= 18>, Top: 1( 1 )[ 1 : 1 Fa= 25 : 0.400 ]( 1 , 1 , 24 ),<|>Tot Used: 47 , Added: 1 , Zero Std: 0 , Max Cor: 0.395
#>
19 <R=0.395,r=0.400,N= 0>
#>
[ 19 ], 0.3946587 Decor Dimension: 47 Nused: 47 . Cor to Base: 24 , ABase: 10 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
1.71e+09
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
8.37e+08
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
0.395
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
0.948
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
if (!largeSet)
{
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
par(op)
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
0.6574137
if (nrow(dataframe) < 1000)
{
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}
if (nrow(dataframe) < 1000)
{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| LBDHDD | 56.3 | 15.86 | 59.33 | 16.37 | 9.15e-05 | 0.555 |
| BMXWAIST | 98.9 | 17.24 | 95.77 | 15.91 | 9.16e-04 | 0.555 |
| BMXWT | 77.9 | 21.27 | 74.42 | 19.70 | 1.92e-07 | 0.551 |
| BMXBMI | 30.0 | 7.74 | 28.70 | 6.97 | 9.56e-06 | 0.550 |
| BPXPLS | 74.7 | 11.80 | 72.77 | 11.16 | 3.12e-06 | 0.550 |
| LBXWBCSI | 7.6 | 2.75 | 7.18 | 2.10 | 3.96e-05 | 0.545 |
| LBDNENO | 4.5 | 1.94 | 4.19 | 1.62 | 3.25e-06 | 0.542 |
| BMXARMC | 32.6 | 5.62 | 31.89 | 5.20 | 3.12e-05 | 0.540 |
| RIDAGEYR | 47.2 | 17.10 | 49.41 | 17.13 | 2.92e-04 | 0.538 |
| URXUCR.x | 104.7 | 71.22 | 97.45 | 71.12 | 1.18e-09 | 0.536 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]
pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| LBDHDD | 56.346 | 15.86 | 59.327 | 16.37 | 9.15e-05 | 0.555 |
| La_MGDCGSZ | 70.533 | 10.42 | 72.595 | 9.98 | 1.26e-01 | 0.553 |
| BMXWT | 77.938 | 21.27 | 74.417 | 19.70 | 1.92e-07 | 0.551 |
| BPXPLS | 74.653 | 11.80 | 72.767 | 11.16 | 3.12e-06 | 0.550 |
| LBXWBCSI | 7.599 | 2.75 | 7.175 | 2.10 | 3.96e-05 | 0.545 |
| RIDAGEYR | 47.216 | 17.10 | 49.407 | 17.13 | 2.92e-04 | 0.538 |
| La_LBXRDW | 37.247 | 1.30 | 37.072 | 1.12 | 2.35e-08 | 0.537 |
| URXUCR.x | 104.722 | 71.22 | 97.451 | 71.12 | 1.18e-09 | 0.536 |
| La_LBXNEPCT | 93.066 | 2.05 | 92.818 | 2.32 | 9.01e-06 | 0.535 |
| Diab | 953.000 | 260.00 | 752.000 | 129.00 | NA | 0.534 |
| La_BMXWAIST | 112.144 | 5.99 | 111.520 | 6.15 | 6.60e-01 | 0.529 |
| La_MGXH2T2 | -0.682 | 1.86 | -0.528 | 1.79 | 1.65e-07 | 0.527 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 3.06 | 35 | 0.593 |
allSigvars <- names(dc)
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| LBDHDD | 56.346 | 15.86 | 59.327 | 16.37 | 9.15e-05 | 0.555 | 0.555 | NA | |
| BMXWAIST | NA | 98.945 | 17.24 | 95.773 | 15.91 | 9.16e-04 | 0.555 | 0.555 | NA |
| La_MGDCGSZ | + 0.314RIDAGEYR + 1.000MGDCGSZ | 70.533 | 10.42 | 72.595 | 9.98 | 1.26e-01 | 0.553 | 0.528 | 5 |
| BMXWT | 77.938 | 21.27 | 74.417 | 19.70 | 1.92e-07 | 0.551 | 0.551 | 5 | |
| BPXPLS | 74.653 | 11.80 | 72.767 | 11.16 | 3.12e-06 | 0.550 | 0.550 | NA | |
| LBXWBCSI | 7.599 | 2.75 | 7.175 | 2.10 | 3.96e-05 | 0.545 | 0.545 | 6 | |
| RIDAGEYR | 47.216 | 17.10 | 49.407 | 17.13 | 2.92e-04 | 0.538 | 0.538 | 2 | |
| La_LBXRDW | + 0.695LBXMC + 1.000LBXRDW | 37.247 | 1.30 | 37.072 | 1.12 | 2.35e-08 | 0.537 | 0.513 | -1 |
| URXUCR.x | 104.722 | 71.22 | 97.451 | 71.12 | 1.18e-09 | 0.536 | 0.536 | NA | |
| MGXH2T2 | NA | 26.701 | 6.25 | 27.558 | 5.78 | 4.09e-01 | 0.535 | 0.535 | NA |
| La_LBXNEPCT | + 1.030LBXLYPCT + 1.000LBXNEPCT + 1.153*LBXEOPCT | 93.066 | 2.05 | 92.818 | 2.32 | 9.01e-06 | 0.535 | 0.525 | 3 |
| Diab | 953.000 | 260.00 | 752.000 | 129.00 | NA | 0.534 | 0.534 | NA | |
| La_BMXWAIST | -0.741BMXWT + 0.441BMXHT + 1.000*BMXWAIST | 112.144 | 5.99 | 111.520 | 6.15 | 6.60e-01 | 0.529 | 0.555 | -2 |
| MGDCGSZ | NA | 55.718 | 11.88 | 57.093 | 11.17 | 5.87e-01 | 0.528 | 0.528 | NA |
| La_MGXH2T2 | + 1.000MGXH2T2 + 0.569MGXH1T3 -0.767*MGDCGSZ | -0.682 | 1.86 | -0.528 | 1.79 | 1.65e-07 | 0.527 | 0.535 | -2 |
| MGXH1T3 | NA | 26.972 | 6.28 | 27.589 | 5.85 | 6.88e-01 | 0.526 | 0.526 | NA |
| LBXNEPCT | NA | 58.254 | 9.43 | 57.404 | 9.06 | 5.22e-02 | 0.525 | 0.525 | NA |
| LBXLYPCT | NA | 30.957 | 8.73 | 31.509 | 8.18 | 1.68e-02 | 0.518 | 0.518 | 4 |
| LBXMC | NA | 33.637 | 1.07 | 33.558 | 0.95 | 1.82e-04 | 0.516 | 0.516 | NA |
| LBXRDW | NA | 13.864 | 1.49 | 13.744 | 1.32 | 0.00e+00 | 0.513 | 0.513 | NA |
| BMXHT | NA | 160.982 | 6.86 | 160.846 | 6.95 | 7.15e-01 | 0.509 | 0.509 | 5 |
| LBXEOPCT | NA | 2.527 | 1.79 | 2.555 | 1.98 | 3.00e-13 | 0.501 | 0.501 | 5 |
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}
pander::pander(table(dataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 205 | 676 |
| 1 | 160 | 1053 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.826 | 0.8088 | 0.842 |
| tp | 0.579 | 0.5578 | 0.601 |
| se | 0.868 | 0.8477 | 0.887 |
| sp | 0.233 | 0.2052 | 0.262 |
| diag.ac | 0.601 | 0.5794 | 0.622 |
| diag.or | 1.996 | 1.5886 | 2.507 |
| nndx | 9.922 | 6.7264 | 18.904 |
| youden | 0.101 | 0.0529 | 0.149 |
| pv.pos | 0.609 | 0.5856 | 0.632 |
| pv.neg | 0.562 | 0.5090 | 0.613 |
| lr.pos | 1.131 | 1.0843 | 1.180 |
| lr.neg | 0.567 | 0.4699 | 0.684 |
| p.rout | 0.174 | 0.1583 | 0.191 |
| p.rin | 0.826 | 0.8088 | 0.842 |
| p.tpdn | 0.767 | 0.7380 | 0.795 |
| p.tndp | 0.132 | 0.1134 | 0.152 |
| p.dntp | 0.391 | 0.3679 | 0.414 |
| p.dptn | 0.438 | 0.3868 | 0.491 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 1053 | 676 | 1729 |
| Test - | 160 | 205 | 365 |
| Total | 1213 | 881 | 2094 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.601 | 0.579 | 0.622 |
| 3 | se | 0.868 | 0.848 | 0.887 |
| 4 | sp | 0.233 | 0.205 | 0.262 |
| 6 | diag.or | 1.996 | 1.589 | 2.507 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}
pander::pander(table(DEdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 411 | 470 |
| 1 | 366 | 847 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.629 | 0.608 | 0.650 |
| tp | 0.579 | 0.558 | 0.601 |
| se | 0.698 | 0.672 | 0.724 |
| sp | 0.467 | 0.433 | 0.500 |
| diag.ac | 0.601 | 0.579 | 0.622 |
| diag.or | 2.024 | 1.690 | 2.424 |
| nndx | 6.069 | 4.462 | 9.548 |
| youden | 0.165 | 0.105 | 0.224 |
| pv.pos | 0.643 | 0.617 | 0.669 |
| pv.neg | 0.529 | 0.493 | 0.565 |
| lr.pos | 1.309 | 1.218 | 1.407 |
| lr.neg | 0.647 | 0.579 | 0.723 |
| p.rout | 0.371 | 0.350 | 0.392 |
| p.rin | 0.629 | 0.608 | 0.650 |
| p.tpdn | 0.533 | 0.500 | 0.567 |
| p.tndp | 0.302 | 0.276 | 0.328 |
| p.dntp | 0.357 | 0.331 | 0.383 |
| p.dptn | 0.471 | 0.435 | 0.507 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 847 | 470 | 1317 |
| Test - | 366 | 411 | 777 |
| Total | 1213 | 881 | 2094 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.601 | 0.579 | 0.622 |
| 3 | se | 0.698 | 0.672 | 0.724 |
| 4 | sp | 0.467 | 0.433 | 0.500 |
| 6 | diag.or | 2.024 | 1.690 | 2.424 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}
pander::pander(table(PCAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 0 | 881 |
| 1 | 0 | 1213 |
pander::pander(ptab)
er: Error
detail:
| NA |
| NA |
| NA |
| NA |
| NA |
| NA |
pander::pander(ptab$detail[c(5,3,4,6),])
NA, NA, NA and NA
par(op)
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}
pander::pander(table(EFAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 0 | 881 |
| 1 | 0 | 1213 |
pander::pander(ptab)
er: Error
detail:
| NA |
| NA |
| NA |
| NA |
| NA |
| NA |
pander::pander(ptab$detail[c(5,3,4,6),])
NA, NA, NA and NA
par(op)
theLaFormulas <- getLatentCoefficients(DEdataframe)
## Add the variable description
allvardes <- rbind(demographiDesc[,1:2],examinationDesc[,1:2],labsDesc[,1:2])
dup <- duplicated(allvardes[,1])
allvardes <- allvardes[!dup,]
rownames(allvardes) <- allvardes[,1]
lavar <- names(theLaFormulas)[1]
theLaFormulas[[lavar]]
#> WTINT2YR WTMEC2YR
#> -1.025106 1.000000
for (lavar in names(theLaFormulas))
{
theLaFormulas[[lavar]]$Desc <- paste("(",allvardes[names(theLaFormulas[[lavar]]),2],")\n\r",sep="")
}
pander::pander(theLaFormulas)
La_WTMEC2YR:
_ and _(Full sample 2 year MEC exam weight.)
_
La_BPXML1:
_ and _(MIL: maximum inflation levels (mm Hg))
_
La_BMXBMI:
, (Standing Height (cm))
_ and _(Body Mass Index (kg/m**2))
_
La_BMXLEG:
_ and _(Upper Leg Length (cm))
_
La_BMXARML:
, (Standing Height (cm))
_ and _(Upper Arm Length (cm))
_
La_BMXARMC:
, (Standing Height (cm))
_ and _(Arm Circumference (cm))
_
La_BMXWAIST:
, (Standing Height (cm))
_ and _(Waist Circumference (cm))
_
La_MGXH1T1:
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGXH2T1:
, (Grip strength (kg), hand 1, test 3)
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGXH1T2:
, (Grip strength (kg), hand 1, test 3)
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGXH2T2:
, (Grip strength (kg), hand 1, test 3)
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGXH1T3:
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGXH2T3:
, (Grip strength (kg), hand 1, test 3)
, (Grip strength (kg), hand 2, test 3)
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_MGDCGSZ:
_ and _(Combined grip strength (kg): the sum of the largest reading from each hand.)
_
La_OHX04TC:
_ and _(Tooth Count: Upper left cuspid (C))
_
La_OHX06TC:
_ and _(Tooth Count: Upper left cuspid (C))
_
La_OHX10TC:
_ and _(Tooth Count: Upper left cuspid (C))
_
La_OHX13TC:
_ and _(Tooth Count: Upper left 2nd bicuspid/2nd primary molar (2B))
_
La_OHX20TC:
_ and _(Tooth Count: Lower right 2nd bicuspid/2nd primary molar (2B))
_
La_OHX29TC:
_ and _(Tooth Count: Lower right 2nd bicuspid/2nd primary molar (2B))
_
La_bpresSYM:
_ and _(NA)
_
La_URDACT:
_ and _(Albumin creatinine ratio (mg/g))
_
La_LBXMOPCT:
, (Monocyte percent (%))
, (Segmented neutrophils percent (%))
, (Eosinophils percent (%))
_ and _(Basophils percent (%))
_
La_LBXNEPCT:
, (Segmented neutrophils percent (%))
_ and _(Eosinophils percent (%))
_
La_LBDLYMNO:
, (Lymphocyte percent (%))
_ and _(Lymphocyte number (1000 cells/uL))
_
La_LBDMONO:
, (Lymphocyte percent (%))
, (Segmented neutrophils percent (%))
, (Eosinophils percent (%))
, (Lymphocyte number (1000 cells/uL))
, (Monocyte number (1000 cells/uL))
_ and _(Segmented neutrophils num (1000 cell/uL))
_
La_LBDNENO:
, (Lymphocyte percent (%))
, (Segmented neutrophils percent (%))
, (Eosinophils percent (%))
, (Lymphocyte number (1000 cells/uL))
_ and _(Segmented neutrophils num (1000 cell/uL))
_
La_LBDEONO:
, (Lymphocyte percent (%))
, (Segmented neutrophils percent (%))
, (Eosinophils percent (%))
, (Lymphocyte number (1000 cells/uL))
, (Segmented neutrophils num (1000 cell/uL))
_ and _(Eosinophils number (1000 cells/uL))
_
La_LBXHGB:
_ and _(Hemoglobin (g/dL))
_
La_LBXHCT:
, (Hematocrit (%))
_ and _(Mean cell hemoglobin concentration (g/dL))
_
La_LBXMCVSI:
, (Hemoglobin (g/dL))
, (Hematocrit (%))
, (Mean cell volume (fL))
, (Mean cell hemoglobin (pg))
_ and _(Mean cell hemoglobin concentration (g/dL))
_
La_LBXMCHSI:
, (Hemoglobin (g/dL))
, (Mean cell hemoglobin (pg))
_ and _(Mean cell hemoglobin concentration (g/dL))
_
La_LBXMC:
, (Hemoglobin (g/dL))
, (Mean cell hemoglobin (pg))
_ and _(Mean cell hemoglobin concentration (g/dL))
_
La_LBXRDW:
_ and _(Red cell distribution width (%))
_
La_LBXMPSI:
_ and _(Mean platelet volume (fL))
_